from sentence_transformers import SentenceTransformer

embedding_dim = 512
embedding = SentenceTransformer('./data/BAAI/bge-small-zh')

text = '“警”此一生，守护平安'

# 分词
ids = embedding.tokenizer(text, add_special_tokens=False)['input_ids']
tokens = embedding.tokenizer.convert_ids_to_tokens(ids)

